Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Feb 25, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Feb 25, 2025

@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/128648.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+37-10)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll (+31-20)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 99016fdd0ff91..10deee1616a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1549,33 +1549,60 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
   const unsigned LastElt = DemandedElts.getActiveBits() - 1;
   const unsigned MaskLen = LastElt - FirstElt + 1;
 
-  // TODO: Handle general subvector extract.
-  if (MaskLen != 1)
+  unsigned OldNumElts = VT->getNumElements();
+  if (MaskLen == OldNumElts && MaskLen != 1)
     return nullptr;
 
   Type *EltTy = VT->getElementType();
-  if (!isTypeLegal(EltTy))
+  Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
+
+  // Theoretically we should support these intrinsics for any legal type. Avoid
+  // introducing cases that aren't direct register types like v3i16.
+  if (!isTypeLegal(NewVT))
     return nullptr;
 
   Value *Src = II.getArgOperand(0);
 
-  assert(FirstElt == LastElt);
-  Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
   // Make sure convergence tokens are preserved.
   // TODO: CreateIntrinsic should allow directly copying bundles
   SmallVector<OperandBundleDef, 2> OpBundles;
   II.getOperandBundlesAsDefs(OpBundles);
 
   Module *M = IC.Builder.GetInsertBlock()->getModule();
-  Function *Remangled = Intrinsic::getOrInsertDeclaration(
-      M, II.getIntrinsicID(), {Extract->getType()});
+  Function *Remangled =
+      Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
+
+  if (MaskLen == 1) {
+    Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
+    // TODO: Preserve callsite attributes?
+    CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
+
+    Value *Result = IC.Builder.CreateInsertElement(
+        PoisonValue::get(II.getType()), NewCall, FirstElt);
+    IC.replaceInstUsesWith(II, Result);
+    IC.eraseInstFromFunction(II);
+    return Result;
+  }
+
+  SmallVector<int> ExtractMask(MaskLen, -1);
+  for (unsigned I = 0; I != MaskLen; ++I) {
+    if (DemandedElts[FirstElt + I])
+      ExtractMask[I] = FirstElt + I;
+  }
+
+  Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
 
   // TODO: Preserve callsite attributes?
   CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
 
-  Value *Result = IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
-                                                 NewCall, FirstElt);
+  SmallVector<int> InsertMask(OldNumElts, -1);
+  for (unsigned I = 0; I != MaskLen; ++I) {
+    if (DemandedElts[FirstElt + I])
+      InsertMask[FirstElt + I] = I;
+  }
+
+  Value *Result = IC.Builder.CreateShuffleVector(NewCall, InsertMask);
   IC.replaceInstUsesWith(II, Result);
   IC.eraseInstFromFunction(II);
   return Result;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index e9d3b5e963b35..056caabb6d60a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -100,8 +100,9 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -112,7 +113,9 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
@@ -258,8 +261,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
 define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;
   %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -270,8 +273,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
 define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;
   %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -282,7 +285,9 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
 define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -321,8 +326,9 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -364,8 +370,9 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -404,7 +411,9 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
 define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
 ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -428,7 +437,9 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
 define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
 ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;

@arsenm arsenm marked this pull request as ready for review February 25, 2025 07:44
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Feb 25, 2025
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane branch from bfe67bc to c80695c Compare February 28, 2025 06:03
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from 0c05270 to ce66b73 Compare February 28, 2025 06:03
Base automatically changed from users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane to main March 5, 2025 01:35
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from ce66b73 to 2500163 Compare March 5, 2025 01:37
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from 2500163 to 632f45d Compare March 5, 2025 10:00
@arsenm
Copy link
Contributor Author

arsenm commented Mar 7, 2025

ping

SmallVector<int> InsertMask(OldNumElts, -1);
for (unsigned I = 0; I != MaskLen; ++I) {
if (DemandedElts[FirstElt + I])
InsertMask[FirstElt + I] = I;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Can we not fold this in upper loop ?

Copy link

@pravinjagtap pravinjagtap left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@arsenm arsenm merged commit af755af into main Mar 7, 2025
11 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch March 7, 2025 10:54
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/14477

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-devrel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-rel-x86-64 running on ml-opt-rel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/14398

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/14655

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-dev-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@jplehr
Copy link
Contributor

jplehr commented Mar 7, 2025

This turned some of our bots red, e.g., https://lab.llvm.org/buildbot/#/builders/140/builds/18458

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder lld-x86_64-ubuntu-fast running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/12608

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/24990

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /build/buildbot/premerge-monolithic-linux/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder llvm-x86_64-debian-dylib running on gribozavr4 while building llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/21386

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-x86_64-debian-dylib/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
/b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants